Observed data

Observed log-chlorophyll at representative station in SF Bay Delta region.

library(tidyverse)
library(lubridate)
library(mgcv)  
library(plotly)
library(WRTDStidal)
library(gridExtra)
source('R/funcs.R')

# flow data, left moving window average of 30 days
data(sf_fldat)
fl_dat <- sf_fldat %>% 
  rename(date = Date) %>% 
  filter(station %in% 'sac') %>% 
  mutate(
    qsm = stats::filter(q, rep(1, 30)/30, sides = 1, method = 'convolution')
  )
  
# format the data to model
data(sf_dat)
sf_mod <- sf_dat %>% 
  filter(Site_Code %in% 'C3') %>% 
  rename(date = Date) %>% 
  mutate(
    doy = yday(date), 
    dec_time = decimal_date(date), 
    yr = year(date),
    mo = month(date, label = T)
  ) %>% 
  left_join(fl_dat, by = 'date') %>% 
  mutate( # all variables ln-transformed
    flo = log(qsm), 
    chl = log(chl), 
    tss = log(tss), 
    nh = log(nh)
    ) %>% 
  select(-q, -qsm, -station, -Latitude, -Longitude, -Location)

# plot, all
p <- ggplot(sf_mod, aes(x = date, y = chl)) + 
  geom_line() +
  theme_bw() 
ggplotly(p)
# boxplot, by years
p <- ggplot(sf_mod, aes(x = yr, y = chl)) + 
  geom_boxplot() + 
  theme_bw()
ggplotly(p)
# boxplot, by month
p <- ggplot(sf_mod, aes(x = mo, y = chl)) + 
  geom_boxplot() + 
  theme_bw()
ggplotly(p)

Adding nitrogen and turbidity covariates

# formula for best annual, seasonal, flow model
strt <- best2$formula %>% 
  as.character

smths <- c(
  "s(nh, bs = 'tp')",  
  "s(tss, bs = 'tp')",
  "te(nh, tss, bs = c('tp', 'tp'))"
)

# get all combinations of smoothers to model, one to many
frmstab <- list()
frms <- list()
for(i in seq_along(smths)){
  
  # for the summary table  
  frmtab <- combn(smths, i) %>%
    apply(2, function(x){
      paste(x, collapse = ' + ')
      })
  
  # for the model
  frm <- sapply(frmtab, function(x){  
        paste('chl ~', strt[3], '+', x) %>% 
          formula
        })
  
  frmstab <- c(frmstab, frmtab)
  frms <- c(frms, frm)
 
}

# create models from smooth formula combinations
mods3 <- map(frms, function(frm){
  
  gam(frm, 
    knots = list(doy = c(1, 366)),
    data = sf_mod, 
    na.action = na.exclude
  )

})
names(mods3) <- paste0('mod', seq_along(mods3))

Summary of all nutrient, tss models:

# smoother stats of GAMs
map(mods3, ~ summary(.x)$s.table %>% data.frame %>% rownames_to_column('smoother')) %>% 
  enframe %>% 
  unnest %>% 
  kable
name smoother edf Ref.df F p.value
mod1 s(dec_time) 6.7326765 7.787103 7.4425304 0.0000000
mod1 s(flo) 3.1028428 3.960042 3.0934249 0.0128012
mod1 te(flo,dec_time) 11.1034028 16.000000 1.7440653 0.0000018
mod1 te(dec_time,doy,flo) 36.5082544 75.000000 3.7440972 0.0000000
mod1 s(nh) 1.0000000 1.000000 10.7589384 0.0011106
mod2 s(dec_time) 6.7631403 7.808429 3.5705014 0.0006671
mod2 s(flo) 4.5555525 5.606869 0.5160503 0.8094488
mod2 te(flo,dec_time) 6.3101387 16.000000 0.8707123 0.0001731
mod2 te(dec_time,doy,flo) 29.4900693 75.000000 3.2696992 0.0000000
mod2 s(tss) 4.4643812 5.534840 6.3307603 0.0000057
mod3 s(dec_time) 7.1607042 8.159000 3.0195017 0.0021427
mod3 s(flo) 1.0000000 1.000000 1.0117026 0.3149976
mod3 te(flo,dec_time) 6.7124424 16.000000 0.9168085 0.0003409
mod3 te(dec_time,doy,flo) 40.9268267 75.000000 3.6832771 0.0000000
mod3 te(nh,tss) 2.9999967 3.000000 10.2798312 0.0000014
mod4 s(dec_time) 7.0804950 8.099234 3.0429996 0.0021289
mod4 s(flo) 1.0000000 1.000000 1.0629383 0.3030597
mod4 te(flo,dec_time) 6.8780322 16.000000 0.9204817 0.0003869
mod4 te(dec_time,doy,flo) 41.2381557 75.000000 3.8211715 0.0000000
mod4 s(nh) 1.0000000 1.000000 1.6753508 0.1961569
mod4 s(tss) 1.0000000 1.000000 20.7089185 0.0000067
mod5 s(dec_time) 7.1604176 8.153588 2.8342646 0.0038231
mod5 s(flo) 1.0000000 1.000000 0.8126575 0.3677865
mod5 te(flo,dec_time) 6.3716705 16.000000 0.8176419 0.0006966
mod5 te(dec_time,doy,flo) 40.9862091 75.000000 3.6871420 0.0000000
mod5 s(nh) 2.9416732 3.677488 0.6073391 0.6762830
mod5 te(nh,tss) 2.2871431 20.000000 0.8529033 0.0000035
mod6 s(dec_time) 6.8672780 7.929956 2.9102143 0.0048115
mod6 s(flo) 1.0000001 1.000000 0.5453671 0.4605768
mod6 te(flo,dec_time) 6.8057389 16.000000 0.9049012 0.0004014
mod6 te(dec_time,doy,flo) 40.6963145 75.000000 3.6503971 0.0000000
mod6 s(tss) 3.7996742 4.785158 6.0630463 0.0000382
mod6 te(nh,tss) 0.1672564 20.000000 0.0093602 0.2632117
mod7 s(dec_time) 7.0804950 8.099234 3.0429996 0.0021289
mod7 s(flo) 1.0000000 1.000000 1.0629383 0.3030597
mod7 te(flo,dec_time) 6.8780322 16.000000 0.9204817 0.0003869
mod7 te(dec_time,doy,flo) 41.2381557 75.000000 3.8211715 0.0000000
mod7 s(nh) 1.0000000 1.000000 1.6753508 0.1961569
mod7 s(tss) 1.0000000 1.000000 20.7089185 0.0000067
mod7 te(nh,tss) 0.0000000 16.000000 0.0000000 1.0000000
# summary stats of GAMs
map(mods3, ~ data.frame(
    AIC = AIC(.x), 
    R2 = summary(.x)$r.sq)) %>% 
  enframe %>% 
  unnest %>% 
  mutate(smth_added = frmstab) %>%
  select(name, smth_added, everything()) %>% 
  kable
name smth_added AIC R2
mod1 s(nh, bs = ‘tp’) 868.6768 0.5897820
mod2 s(tss, bs = ‘tp’) 854.2826 0.5884729
mod3 te(nh, tss, bs = c(‘tp’, ‘tp’)) 843.6342 0.5967456
mod4 s(nh, bs = ‘tp’) + s(tss, bs = ‘tp’) 842.2550 0.5973803
mod5 s(nh, bs = ‘tp’) + te(nh, tss, bs = c(‘tp’, ‘tp’)) 845.4859 0.5966324
mod6 s(tss, bs = ‘tp’) + te(nh, tss, bs = c(‘tp’, ‘tp’)) 843.7245 0.5970284
mod7 s(nh, bs = ‘tp’) + s(tss, bs = ‘tp’) + te(nh, tss, bs = c(‘tp’, ‘tp’)) 842.2550 0.5973803

Summary stats of best three three models:

# best model with season, year, flow
best3 <- map(mods3, ~ summary(.x)$r.sq) %>% 
  unlist %>% 
  which.max %>% 
  mods3[[.]] 

best <- list(best1 = best1, best2 = best2, best3 = best3)

# smoother stats of GAMs
map(best, ~ summary(.x)$s.table %>% data.frame %>% rownames_to_column('smoother')) %>% 
  enframe %>% 
  unnest %>% 
  kable
name smoother edf Ref.df F p.value
best1 s(dec_time) 7.332864 8.308760 9.2780106 0.0000000
best1 s(doy) 3.385611 8.000000 5.2844996 0.0000000
best1 te(dec_time,doy) 6.637691 15.000000 1.4177080 0.0003557
best2 s(dec_time) 6.447441 7.549442 4.6515592 0.0000330
best2 s(flo) 3.199187 4.047691 1.9311669 0.0935574
best2 te(flo,dec_time) 11.199615 16.000000 1.8463772 0.0000008
best2 te(dec_time,doy,flo) 37.046429 75.000000 3.6900576 0.0000000
best3 s(dec_time) 7.080495 8.099234 3.0429996 0.0021289
best3 s(flo) 1.000000 1.000000 1.0629383 0.3030597
best3 te(flo,dec_time) 6.878032 16.000000 0.9204817 0.0003869
best3 te(dec_time,doy,flo) 41.238156 75.000000 3.8211715 0.0000000
best3 s(nh) 1.000000 1.000000 1.6753508 0.1961569
best3 s(tss) 1.000000 1.000000 20.7089185 0.0000067
best3 te(nh,tss) 0.000000 16.000000 0.0000000 1.0000000
# summary stats of GAMs
map(best, ~ data.frame(
    AIC = AIC(.x), 
    R2 = summary(.x)$r.sq)) %>% 
  enframe %>% 
  unnest %>% 
  kable
name AIC R2
best1 971.9230 0.4755770
best2 880.8418 0.5852104
best3 842.2550 0.5973803
# predictions
sf_res3 <- map(best, function(x){
  sf_mod %>% 
    mutate(
      pred = predict(x)
    )
  }) %>% 
  enframe('mods') %>% 
  unnest

# plot
p <- ggplot(sf_res3, aes(x = date)) + 
  geom_point(data = sf_mod, aes(y = chl), size = 0.5) + 
  geom_line(aes(y = pred, colour = mods)) + 
  theme_bw() + 
  theme(
    legend.position = 'top', 
    legend.title = element_blank()
    )
ggplotly(p)